#################################################################################
# Replication file for:                                                         #
# "Balancing Precision and Retention in Experimental Design"                    #
#                                                                               #
# Gustavo Diaz                                                                  #
# Northwestern University                                                       #
# gustavo.diaz@northwestern.edu                                                 #
#                                                                               #
# Erin L. Rossiter                                                              #
# University of Notre Dame                                                      #
# erossite@nd.edu                                                               #
#                                                                               #
# This file produces Appendix Figure F8.                                        #                                          
#################################################################################

# Setup -----

# ggplot global options
theme_set(theme_bw(base_size = 20))

# Data -----

study1 <- readRDS("data/processed_data/diagnosis_study1.RDS")
study2 <- readRDS("data/processed_data/diagnosis_study2.RDS")
study3 <- readRDS("data/processed_data/diagnosis_study3.RDS")
study4 <- readRDS("data/processed_data/diagnosis_study4.RDS")
study5 <- readRDS("data/processed_data/diagnosis_study5.RDS")
study6 <- readRDS("data/processed_data/diagnosis_study6.RDS")
sims <- bind_rows(study1 %>% mutate(study = "Study 1"),
                  study2 %>% mutate(study = "Study 2"),
                  study3 %>% mutate(study = "Study 3"),
                  study4 %>% mutate(study = "Study 4"),
                  study5 %>% mutate(study = "Study 5"),
                  study6 %>% mutate(study = "Study 6")
)

# Standard design -- use only when 'drop == 0' for comparison
std = sims %>% 
  filter(estimator == "Complete + Post-only" & drop == 0) %>%
  select(study, corr, std_est = estimate, sim_ID)
nrow(std)

# Left join the relevant standard design per simulation iteration
test_df = left_join(sims, std, by = c("study", "corr",  "sim_ID"))

# Prepare test statistics ----
tests = test_df %>% 
  filter(estimator != "Complete + Post-only") %>% 
  group_by(study, estimator, corr, drop) %>% 
  summarize(f = var.test(x = estimate, y = std_est)$statistic,
            p_f = var.test(x = estimate, y = std_est)$p.value,
            .groups = "drop")


# Arrange factors for plot
tests <- tests %>%
  mutate(
    estimator = factor(estimator, levels = c("Complete + Pre-post",
                                             "Block one + Post-only",
                                             "Block one + Pre-post",
                                             "Block all + Post-only",
                                             "Block all + Pre-post")),
    corr = as.factor(corr))

# F-test -----
f_plot <- ggplot(tests) +
  aes(x = drop, y = f, group = 1, color = corr, shape = p_f < 0.05) +
  facet_grid(study ~ estimator) +
  geom_hline(yintercept = 1, linetype = "dashed") +
  geom_line(aes(group = corr)) +
  geom_point(size = 2, position = position_dodge2(width = 0.03)) +
  scale_shape_manual(values = c(4, 19)) +
  scale_color_manual(values = c("#A0A0A0", "#6D6D6D", "#3D3D3D")) +
  #scale_color_viridis_d() +
  labs(x = "Sample loss",
       y = "Variance ratio\n(alternative/standard)",
       color = "Correlation between pre-treatment variable and Y",
       shape = "p < 0.05") +
  theme_minimal() +
  theme(legend.position = "bottom",
        panel.border = element_rect(color = "black", fill = NA),
        panel.grid.minor.x = element_blank(),
        panel.grid.minor.y = element_blank(),
        text = element_text(size = 12),
        legend.text = element_text(size = 10),
        legend.title = element_text(size = 12),
        legend.direction = "vertical") +
  guides(color = guide_legend(order = 1, ncol = 5),
         shape = guide_legend(order = 2, ncol = 2))

# save
ggsave(plot = f_plot, file = "figures/figF8.pdf", width = 10, height = 10)